Use of the heatmaply package with SCE objects

Explains how to generate an interactive heatmap from a SingleCellExperiment object Includes a summarizeHeatmap function that calculates the median counts by channel and by cluster (or any other variable in colData(sce)).
For more information about heatmaply, see the heatmaply vignette and the original heatmaply publication.

Requirements

  • Packages: SingleCellExperiment, heatmaply, data.table.
  • An SCE object with the channels as rownames(sce).

Load the librairies

library(SingleCellExperiment)
library(heatmaply)

Load example SCE

Contains a subset of the pancreas dataset

fn.sce <- file.path('..', '..', '..', 'data', 'pancreas_example_sce.rds')
sce <- readRDS(fn.sce)
sce
## class: SingleCellExperiment 
## dim: 38 12274 
## metadata(0):
## assays(1): counts
## rownames(38): H3 SMA ... Ir191 Ir193
## rowData names(3): channel metal target
## colnames(12274): E34_1 E34_2 ... J02_4952 J02_4953
## colData names(21): slide id ... Age Gender
## reducedDimNames(0):
## spikeNames(0):
## altExpNames(0):

Function to summarize the data

Returns median counts by cluster and by channel. The arguments are the following:
sce = A SingleCellExperiment object.
expr_values = A string corresponding to an assay in the sce, should be in names(sce).
cluster_by = Name of the column containing the clusters.
channels= channels to include, should be in rownames(sce). If NULL, all channels will be summarized.

summarizeHeatmap <- function(sce, expr_values, cluster_by, channels=NULL){
  require(data.table)

  # Argument checks
  if(is.null(expr_values)){
    expr_values <- names(assays(sce))[1]
    print(paste0("Warning: Assay type not provided, '", expr_values, "' used."))
  }

  if(is.null(channels[1])){
    channels <- rownames(sce)
  }

  if(!all(channels %in% rownames(sce))){
    stop("Channel names do not correspond to the rownames of the assay")
  }

  if(is.null(cluster_by)){
    stop("Cluster column not provided")
  }

  if(!cluster_by %in% colnames(colData(sce))){
    stop("The cluster_by argument should correspond to a colData(sce) column")
  }

  # Convert the data to a melted format
  dat <- as.data.table(t(assay(sce, expr_values)[channels,]))
  dat[, id := colnames(sce)]
  dat[, cluster := colData(sce)[, cluster_by]]
  dat <- melt.data.table(dat, id.vars=c('id','cluster'), variable.name='channel', value.name=expr_values)

  # Summarize the data
  dat.summary <- dat[, list(
    median.val = median(get(expr_values)),
    mean.val = mean(get(expr_values)),
    cellspercluster = .N),
    by = c('channel', 'cluster')]

  # Decast the summarized data and convert to a matrix. Currently only the median values are returned
  hm.cell <- dcast.data.table(dat.summary, formula='cluster ~ channel', value.var='median.val')
  hm.clusters <- hm.cell$cluster
  hm.cell <- as.matrix(hm.cell[, -1, with=FALSE])
  
  # Add rownames
  rownames(hm.cell) <- hm.clusters

  # Return the median values
  return(as.matrix(hm.cell))
}

Run the summarizeHeatmap function

# Select the relevant channels
good.channels <- rownames(sce)[! rownames(sce) %in% c('H3', 'PD-1', 'cPARP', 'Ir191', 'Ir193')]

hm <- summarizeHeatmap(sce,
                       expr_values = 'counts',
                       cluster = 'CellType',
                       channels = good.channels)

Expression heatmap

Displays normalized, scaled or percentized counts per cluster and channel
Normalize: https://cran.r-project.org/web/packages/heatmaply/vignettes/heatmaply.html#normalize
Scale: https://cran.r-project.org/web/packages/heatmaply/vignettes/heatmaply.html#scale
Percentize: https://cran.r-project.org/web/packages/heatmaply/vignettes/heatmaply.html#percentize

# Normalize
heatmaply(heatmaply::normalize(hm))
# Scale
heatmaply(hm, scale="column")
# Percentize
heatmaply(heatmaply::percentize(hm))

Using asinh-transformed counts

# Caculate transformed counts
assay(sce, "exprs") <- asinh(counts(sce) / 1)

# Summarize the data
hm.exprs <- summarizeHeatmap(sce,
                             expr_values = 'exprs',
                             cluster = 'CellType',
                             channels = good.channels)

# Plot the heatmap
heatmaply(heatmaply::normalize(hm.exprs))

Heatmap with other variables

Any other variable in colData(sce) can be passed to summarizeHeatmap.
Example with donors (case).

# Summarize the data
hm.cases <- summarizeHeatmap(sce,
                             expr_values = 'counts',
                             cluster = 'case',
                             channels = good.channels)

# Plot the heatmap
heatmaply(heatmaply::normalize(hm.cases))

Add row-side colors

Here, row side colors are defined in function of the cell types and cell categories (defined in colData(sce)$CellType and colData(sce)$CellCat).

# Get cell types and cell categories
rsc <- unique(data.frame(`Cell type`= colData(sce)$CellType,
                         `Cell category` = colData(sce)$CellCat))

# Plot the heatmap
heatmaply(heatmaply::normalize(hm), RowSideColors = rsc)

Correlation heatmap

Displays correlation between channels

heatmaply_cor(cor(hm))

Correlation heatmap with point size corresponding to the p-value of the correlation test

https://cran.r-project.org/web/packages/heatmaply/vignettes/heatmaply.html#correlation-heatmaps

r <- cor(hm)

cor.test.p <- function(x){
    FUN <- function(x, y) cor.test(x, y)[["p.value"]]
    z <- outer(
      colnames(x), 
      colnames(x), 
      Vectorize(function(i,j) FUN(x[,i], x[,j]))
    )
    dimnames(z) <- list(colnames(x), colnames(x))
    z
}
p <- cor.test.p(hm)

heatmaply_cor(
  r,
  node_type = "scatter",
  point_size_mat = -log10(p), 
  point_size_name = "-log10(p-value)",
  label_names = c("x", "y", "Correlation")
)

Generate a static heatmap

ggheatmap(heatmaply::normalize(hm))

Save the heatmap

The file argument can be provided to save the heatmap as an html file

# Define the name of the output html file
fn.hm <- file.path('./', "saved-heatmap.html")

# Save the heatmap to html
heatmaply(heatmaply::normalize(hm), file=fn.hm)